import json
import numpy as np
import pandas as pd
from collections import defaultdict, Counter

np.random.seed(11)

df = pd.read_csv("rwwd_full.csv")
df = df.sample(frac=1).reset_index(drop=True) # shuffling

def distribution(df):
	print(Counter(list(df["anxiety"])))

df = df[["anxiety", "text_long"]]
train_dev = df[:2000]
test = df[2000:]

train_dev.to_csv("train_dev.csv")
test.to_csv("test.csv")

distribution(train_dev)
distribution(test)
